import pandas as pd
import jsonlines
import start


# %%
df = pd.read_excel(start.MAIN_DIR + "data/clean/character_classifications_gold.xlsx")
df = df[df.set == "train"]

prompts_file = start.MAIN_DIR + "prompts_characterization.xlsx"
prompts = pd.read_excel(prompts_file, sheet_name=None)

SHEET_NAME = "Zero Shot 1"

sheet_data = prompts[SHEET_NAME]
prompt = []
for message in sheet_data.index:
    dict_entry = {
        "role": sheet_data.loc[message, "role"],
        "content": sheet_data.loc[message, "content"],
    }
    prompt.append(dict_entry)

# %%
examples = []
for text, classification in zip(df.text, df.character_gold):
    message_list = []
    message_list.append(prompt[0])
    message_list.append({"role": "user", "content": text})
    message_list.append({"role": "assistant", "content": classification})
    examples.append({"messages": message_list})

with jsonlines.open(
    start.MAIN_DIR + "data/clean/character_classifications_zero_shot_3.jsonl", mode="w"
) as writer:
    for example in examples:
        writer.write(example)
